/*==============================================================================
United States labor market data

Outline: 
I. clean & assemble U.S. state-level data from the following sources: 
	(1) 	March CPS, 1968-2012. Source: IPUMS-CPS
	(2) 	BLS LAUS, 1976-2012. Source: ftp://ftp.bls.gov/pub/time.series/la/
	(3) 	BEA Regional Economic Accounts, 1969-2011. Source: http://www.bea.gov/regional/downloadzip.cfm
	(4) 	U.S. Census Intercensal Population Estimates
II. 	Combine all data and prepare for appending to European data and merging with WVS 
	(a) 	Combine all data		
	(b)	Create geographic regions for merge with WVS
==============================================================================*/

clear 
set more off

cd "$insheet_files/USA"

* ==============================================================================
*(1) 	March CPS, 1968-2012. Source: IPUMS-CPS
* ==============================================================================
cd "/scratch/public/hysteresis_files/"

! uncompress "/scratch/public/hysteresis_files/cps_00148.dat.Z"
do cps_00148.do

*		Population 
gen POP_marchcps 		= 1 
*		No. Unemployed
gen UNEMP_marchcps 		= 1 if (empstat>=20 & empstat<=22) & age>=15
*		No. Employed
gen EMP_marchcps 		= 1 if (empstat>=10 & empstat<=13) & age>=15
*		No. Employed
gen ts_EMP_female 		= 1 if (empstat>=10 & empstat<=13) & age>=15 & sex==2


*		Collapse by year-state
replace wtsupp=0 if wtsupp<0

collapse (sum) POP EMP UNEMP ts_EMP_female [pw=wtsupp], by(year statefip)

gen LF_marchcps	= UNEMP_marchcps + EMP_marchcps

save "$dta_files/US_marchcps_1968_2012.dta", replace

! compress cps_00148.dat

* ==============================================================================
*(2) 	BLS LAUS, 1976-2012. Source: ftp://ftp.bls.gov/pub/time.series/la/
* ==============================================================================

clear
set more off
cd "$insheet_files/USA"

unzipfile bls_laus, replace

cd "$insheet_files/USA/bls_laus"

forvalues x=7/59 {

	clear
	if `x'!=55 {
		quietly insheet using state`x'.txt
		quietly destring value, replace force
		
		tempfile temp`x'
		quietly save `temp`x''
	}
}

forvalues x=7/58 {

if `x'!=55 {
	quietly append using `temp`x''
	}
}

gen series_name = substr(series_id,1,5) 
keep if series_name=="LAUST" //non-SA state-level series

gen statefip = substr(series_id,6,2) 
destring statefip, replace

gen month = substr(period,2,2) 
destring month, replace
drop period 

gen series = substr(series_id,13,1)
destring series, replace
#delim ;
label define measure_code
03	"unemployment rate"
04	"unemployment"
05	"employment"
06	"labor force" ;
#delim cr
label values series measure_code

drop series_name series_id footnote

reshape wide value, i(statefip year month) j(series)

rename value3 ue_rate_bls_laus
rename value4 UNEMP_bls_laus
rename value5 EMP_bls_laus
rename value6 LF_bls_laus

collapse (mean) ue_rate-LF_bls, by(statefip year)

sort statefip year
drop if year==2013

cd "$insheet_files/USA/bls_laus"
forvalues s = 1/59 {
	capture rm state`s'.txt  
}

tempfile US_bls_laus_1976_2012
save 	 `US_bls_laus_1976_2012.dta'

* ==============================================================================
*(3) 	U.S. Census Intercensal Population Estimates
* ==============================================================================
clear
cd "$insheet_files/USA/"
import excel "US Census Population Estimates.xls", first cellrange(A2:AS53) sheet("POP Processed")

reshape long y, i(statefip state) j(year)
rename y POP_census //note: POP already in thousands

tempfile US_census_state_pop
save `US_census_state_pop.dta'

* ==============================================================================
*II. 	Combine data 
* ==============================================================================
set more off

*(a) 	Combine data

use "$dta_files/US_marchcps_1968_2012.dta", clear
merge 1:1 statefip year using `US_bls_laus_1976_2012.dta', nogen
merge 1:1 statefip year using  `US_census_state_pop.dta', nogen

*Note: the reason why this is not a perfect merge is because before 1977, 
* some states are listed only as part of composite regions in March CPS. 

*(b)	Create geographic regions for merge with WVS

gen country="US"
drop if statefip==43 //drop Puerto Rico

gen region = .
replace region=	840004 if statefip == 1
replace region=	840009 if statefip == 2
replace region=	840008 if statefip == 4
replace region=	840005 if statefip == 5
replace region=	840010 if statefip == 6
replace region=	840008 if statefip == 8
replace region=	840001 if statefip == 9
replace region=	840003 if statefip == 10
replace region=	840003 if statefip == 11
replace region=	840003 if statefip == 12
replace region=	840003 if statefip == 13
replace region=	840003 if statefip == 14
replace region=	840009 if statefip == 15
replace region=	840008 if statefip == 16
replace region=	840006 if statefip == 17
replace region=	840006 if statefip == 18
replace region=	840007 if statefip == 19
replace region=	840007 if statefip == 20
replace region=	840004 if statefip == 21
replace region=	840005 if statefip == 22
replace region=	840001 if statefip == 23
replace region=	840003 if statefip == 24
replace region=	840001 if statefip == 25
replace region=	840006 if statefip == 26
replace region=	840007 if statefip == 27
replace region=	840004 if statefip == 28
replace region=	840007 if statefip == 29
replace region=	840008 if statefip == 30
replace region=	840007 if statefip == 31
replace region=	840008 if statefip == 32
replace region=	840001 if statefip == 33
replace region=	840002 if statefip == 34
replace region=	840008 if statefip == 35
replace region=	840002 if statefip == 36
replace region=	840003 if statefip == 37
replace region=	840007 if statefip == 38
replace region=	840006 if statefip == 39
replace region=	840005 if statefip == 40
replace region=	840009 if statefip == 41
replace region=	840002 if statefip == 42
replace region=	840001 if statefip == 44
replace region=	840003 if statefip == 45
replace region=	840007 if statefip == 46
replace region=	840004 if statefip == 47
replace region=	840005 if statefip == 48
replace region=	840008 if statefip == 49
replace region=	840001 if statefip == 50
replace region=	840003 if statefip == 51
replace region=	840009 if statefip == 53
replace region=	840003 if statefip == 54
replace region=	840006 if statefip == 55
replace region=	840008 if statefip == 56
replace region=	840001 if statefip == 61
replace region=	840008 if statefip == 65
replace region=	840009 if statefip == 68
replace region=	840007 if statefip == 69
replace region=	840001 if statefip == 70
replace region=	840006 if statefip == 71
replace region=	840007 if statefip == 72
replace region=	840007 if statefip == 73
replace region=	840003 if statefip == 74
replace region=	840003 if statefip == 75
replace region=	840004 if statefip == 76
replace region=	840005 if statefip == 77
replace region=	840008 if statefip == 78
replace region=	840008 if statefip == 79
replace region=	840009 if statefip == 80
replace region=	840001 if statefip == 81
replace region=	840003 if statefip == 83
replace region=	840004 if statefip == 84
replace region=	840005 if statefip == 85
replace region=	840007 if statefip == 87
replace region=	840009 if statefip == 88
replace region=	840008 if statefip == 89
replace region=	840003 if statefip == 90 


#delim ;
label define region_wvs_labels
840001	"US: New England"
840002	"US: Middle Atlantic States"
840003	"US: South Atlantic"
840004	"US: East South Central"
840005	"US: West South Central"
840006	"US: East North Central"
840007	"US: West North Central"
840008	"US: Rocky Mountain state"
840009	"US: Northwest"
840010	"US: California" ;
#delim cr

label values region region_wvs_labels

*===============================================================================
* Gen U.S. NUTS equivalent = States 
*===============================================================================

tempfile precollapse
save `precollapse.dta' 

keep statefip year country POP* LF* EMP* UNEMP* ts_EMP_female

tempfile US_states
save `US_states.dta' 

*Collapse Regions
use `precollapse.dta', clear
collapse (sum) POP* LF* EMP* UNEMP* ts_EMP_female (first) country, by(year region)
sort year region

tempfile US_regions
save `US_regions.dta'

*Collapse Entire US
use `precollapse.dta', clear
collapse (sum) POP* LF* EMP* UNEMP* ts_EMP_female (first) country, by(year)
gen region= 0

tempfile US_all
save `US_all.dta'

use `US_states.dta'

* Inconsistency in CT data before 1972
 foreach var of varlist *_marchcps {
	 replace `var'=. if statefip == 9 & year<=1972
 }

drop if statefip>=61 //before 1977 some states only have data in March CPS as part of composite regions. This drops those composite regions.
drop if statefip==43 //drop Puerto Rico

append using `US_regions.dta'
append using  `US_all.dta'

* Replace "0" from previous collapse with "." 
foreach var of varlist POP* LF* EMP* UNEMP* ts_EMP_female {
	replace `var' =. if `var' ==0 
}

gen nuts=statefip
	replace nuts= region if statefip==.

tostring nuts, replace force
replace nuts = "US: AK" if nuts=="2" 
replace nuts = "US: AL" if nuts=="1" 
replace nuts = "US: AR" if nuts=="5" 
replace nuts = "US: AS" if nuts=="60" 
replace nuts = "US: AZ" if nuts=="4" 
replace nuts = "US: CA" if nuts=="6" 
replace nuts = "US: CO" if nuts=="8" 
replace nuts = "US: CT" if nuts=="9" 
replace nuts = "US: DC" if nuts=="11" 
replace nuts = "US: DE" if nuts=="10" 
replace nuts = "US: FL" if nuts=="12" 
replace nuts = "US: GA" if nuts=="13" 
replace nuts = "US: GU" if nuts=="66" 
replace nuts = "US: HI" if nuts=="15" 
replace nuts = "US: IA" if nuts=="19" 
replace nuts = "US: ID" if nuts=="16" 
replace nuts = "US: IL" if nuts=="17" 
replace nuts = "US: IN" if nuts=="18" 
replace nuts = "US: KS" if nuts=="20" 
replace nuts = "US: KY" if nuts=="21" 
replace nuts = "US: LA" if nuts=="22" 
replace nuts = "US: MA" if nuts=="25" 
replace nuts = "US: MD" if nuts=="24" 
replace nuts = "US: ME" if nuts=="23" 
replace nuts = "US: MI" if nuts=="26" 
replace nuts = "US: MN" if nuts=="27" 
replace nuts = "US: MO" if nuts=="29" 
replace nuts = "US: MS" if nuts=="28" 
replace nuts = "US: MT" if nuts=="30" 
replace nuts = "US: NC" if nuts=="37" 
replace nuts = "US: ND" if nuts=="38" 
replace nuts = "US: NE" if nuts=="31" 
replace nuts = "US: NH" if nuts=="33" 
replace nuts = "US: NJ" if nuts=="34" 
replace nuts = "US: NM" if nuts=="35" 
replace nuts = "US: NV" if nuts=="32" 
replace nuts = "US: NY" if nuts=="36" 
replace nuts = "US: OH" if nuts=="39" 
replace nuts = "US: OK" if nuts=="40" 
replace nuts = "US: OR" if nuts=="41" 
replace nuts = "US: PA" if nuts=="42" 
replace nuts = "US: PR" if nuts=="72" 
replace nuts = "US: RI" if nuts=="44" 
replace nuts = "US: SC" if nuts=="45" 
replace nuts = "US: SD" if nuts=="46" 
replace nuts = "US: TN" if nuts=="47" 
replace nuts = "US: TX" if nuts=="48" 
replace nuts = "US: UT" if nuts=="49" 
replace nuts = "US: VA" if nuts=="51" 
replace nuts = "US: VI" if nuts=="78" 
replace nuts = "US: VT" if nuts=="50" 
replace nuts = "US: WA" if nuts=="53" 
replace nuts = "US: WI" if nuts=="55" 
replace nuts = "US: WV" if nuts=="54" 
replace nuts = "US: WY" if nuts=="56" 
replace nuts = "Entire U.S." if region==0
replace nuts = "US: New England" if region== 840001 
replace nuts = "US: Middle Atlantic States" if region==840002
replace nuts = "US: South Atlantic" if region==840003	
replace nuts = "US: East South Central" if region==840004	
replace nuts = "US: West South Central" if region==840005	
replace nuts = "US: East North Central" if region==840006	
replace nuts = "US: West North Central" if region==840007	
replace nuts = "US: Rocky Mountain state" if region==840008	
replace nuts = "US: Northwest" if region==840009
replace nuts = "US: California" if region==840010

********************************************************************************
	
gen LF_US    = 	LF_bls 
		replace LF_US 	 = LF_marchcps if year<=1975
gen EMP_US   = 	EMP_bls_laus 	
		replace EMP_US 	 = EMP_marchcps if year<=1975
gen UNEMP_US = 	UNEMP_bls_laus
		replace UNEMP_US = UNEMP_marchcps if year<=1975
	
keep nuts year POP* LF_US EMP_US UNEMP_US ts_EMP_female

*Report data in thousands
foreach var of varlist POP_marchcps LF* EMP* UNEMP* ts_EMP_female {
	replace `var'=`var'/1000
}

save "$dta_files/US_labor", replace 
